In [1]:
# January 12, 2021
# PHYS 232
# Jake Bobowski

# This is a Jupyter Notebook used to write and execute Python code.
# A number sign at the start of a line is used to enter comments.  
# Commented lines are not part of the code and are not executed.
In [2]:
# Some of the most basic commands are intutiive, like addition...
3 + 5
Out[2]:
8
In [3]:
# and multiplication.
3*5
Out[3]:
15
In [4]:
# The notation used for powers is not obvious.
2**4
Out[4]:
16
In [5]:
# If, for some reason, you want to suppress the output from a line of code, use a semicolon after the line.
2**4;
In [6]:
# You might expect the command "sin(pi/2)" to result in an output of 1.  However, instead, you'll get an error.
sin(pi/2)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-6-03b630e2be1f> in <module>
      1 # You might expect the command "sin(pi/2)" to result in an output of 1.  However, instead, you'll get an error.
----> 2 sin(pi/2)

NameError: name 'sin' is not defined
In [7]:
# To do something like the above operation, we first need to import the so-called "NumPy" module.  NumPy is very useful.
# If you're doing any kind of math in Python, you'll almost certainly want to import this module.
import numpy as np
In [8]:
# Now we can access pi using:
np.pi
Out[8]:
3.141592653589793
In [9]:
# We can also work with trig functions:
np.sin(np.pi/2)
Out[9]:
1.0
In [10]:
# In this Notebook, I won't try to introduce too much.  However, in Experiment #1 of PHYS 232 you will be asked
# to produce a Histogram from a list of data.  We will attempt to demonstrate enough that you will be able
# to complete this task.

# If you're interested in a broader set of Python tutorials that are useful for the kinds of data analysis tasks
# that you might be asked to do as a physics undergraduate, visit:
# https://people.ok.ubc.ca/jbobowsk/Python.html
# These tutorials are presented as complete scripts written in Python.
In [ ]:
# We need to load the data that we want to plot into an array.  The histogram data is in a text file.  
# First, from the menu bar select File -> Open.  The click on the "Upload" button.  Navigate to the
# desired file, select it and then complete the upload.

# The file that will be used in this example is called "hist data.txt".  It is a single column of
# data and it can be downloaded from the course website:
# https://people.ok.ubc.ca/jbobowsk/phys232.html
In [11]:
# To read the date from the file into an array, we will again use a command from NumPy: "np.loadtex()":
data = np.loadtxt("hist data.txt")
In [12]:
# If we want to see all of the elements of the array, we can...
data
Out[12]:
array([1.1560e-07, 1.4800e-07, 2.9930e-07, 1.5140e-07, 6.1300e-08,
       2.8900e-08, 1.4450e-07, 5.7800e-08, 5.7800e-08, 1.4450e-07,
       0.0000e+00, 2.3120e-07, 5.7800e-08, 0.0000e+00, 1.4450e-07,
       1.4450e-07, 2.8900e-08, 2.7040e-07, 2.8900e-08, 2.8900e-08,
       1.1560e-07, 1.1560e-07, 2.6010e-07, 1.1560e-07, 5.0500e-07,
       1.5140e-07, 1.4450e-07, 2.3810e-07, 0.0000e+00, 1.8320e-07,
       2.6650e-07, 1.6200e-08, 8.1000e-09, 3.6000e-07, 2.1380e-07,
       7.5700e-08, 1.3010e-07, 2.6010e-07, 9.6500e-08, 2.9930e-07,
       1.7810e-07, 1.8450e-07, 1.4450e-07, 0.0000e+00, 3.3800e-07,
       1.2890e-07, 9.6500e-08, 9.6500e-08, 3.2770e-07, 1.9300e-07,
       1.4450e-07, 2.5250e-07, 6.2180e-07, 4.9480e-07, 1.8490e-07,
       1.2200e-07, 1.1560e-07, 1.2800e-08, 1.8490e-07, 1.6200e-08,
       3.7000e-08, 5.7800e-08, 1.5140e-07, 3.6640e-07, 1.1437e-06,
       0.0000e+00, 1.3520e-07, 9.2450e-07, 1.2200e-07, 1.8490e-07,
       7.5700e-08, 1.9300e-07, 1.1560e-07, 3.6000e-07, 3.3290e-07,
       1.4450e-07, 2.6010e-07, 1.2250e-07, 6.4000e-09, 3.8890e-07,
       1.3010e-07, 1.3010e-07, 1.6200e-08, 1.1285e-06, 3.8800e-08,
       3.6130e-07, 6.4000e-09, 2.5250e-07, 1.0000e-07, 5.0500e-07,
       2.1380e-07, 5.7800e-08, 1.1560e-07, 1.4500e-08, 6.4000e-09,
       1.6200e-08, 3.5300e-08, 1.2890e-07, 9.1400e-08, 3.7000e-08,
       7.5700e-08, 2.8900e-08, 3.5300e-08, 1.2370e-07, 2.9930e-07,
       2.4400e-07, 3.8890e-07, 9.1400e-08, 1.4450e-07, 9.6500e-08,
       3.7000e-08, 1.2200e-07, 9.6500e-08, 1.4450e-07, 1.2200e-07,
       2.1380e-07, 1.2370e-07, 1.4450e-07, 9.1400e-08, 1.8450e-07,
       3.7000e-08, 5.7800e-08, 6.8900e-08, 7.2250e-07, 2.1380e-07,
       4.0500e-08, 4.2760e-07, 3.7000e-08, 1.8490e-07, 1.8490e-07,
       2.0880e-07, 3.6810e-07, 4.2250e-07, 2.7040e-07, 1.3060e-07,
       0.0000e+00, 3.2260e-07, 3.0050e-07, 3.0050e-07, 7.3280e-07,
       9.6500e-08, 5.4490e-07, 4.8250e-07, 6.7600e-08, 4.8250e-07,
       2.4740e-07, 1.2800e-08, 1.3010e-07, 1.2370e-07, 8.1000e-09,
       8.1000e-09, 5.7800e-08, 3.7000e-08, 5.7800e-08, 2.5250e-07,
       9.4900e-08, 6.7600e-08, 5.7800e-08, 3.5300e-08, 1.6200e-08,
       9.6500e-08, 3.6980e-07, 1.9300e-07, 3.5300e-08, 2.8900e-08,
       3.6640e-07, 5.4370e-07, 1.3520e-07, 3.5300e-08, 3.5300e-08,
       5.7800e-08, 6.4000e-09, 7.5700e-08, 9.6500e-08, 1.9010e-07,
       3.3800e-07, 6.2500e-08, 7.0600e-08, 3.3290e-07, 5.9290e-07,
       1.4450e-07, 2.1380e-07, 2.7680e-07, 1.9010e-07, 1.9130e-07,
       4.2760e-07, 3.6810e-07, 6.7600e-08, 1.7810e-07, 1.7640e-07,
       6.8900e-08, 2.3810e-07, 1.3520e-07, 1.2370e-07, 6.1300e-08,
       1.1560e-07, 1.4450e-07, 5.7800e-08, 7.4000e-08, 7.4000e-08,
       1.9300e-07, 3.7000e-08, 2.1380e-07, 6.2500e-08, 1.8280e-07,
       2.8900e-08, 1.0000e-07, 6.4000e-09, 3.7000e-08, 1.4450e-07,
       2.1380e-07, 5.3000e-07, 1.8490e-07, 1.8320e-07, 2.7680e-07,
       3.6810e-07, 6.4000e-09, 6.7600e-08, 2.8900e-08, 3.5300e-08,
       2.6820e-07, 1.1560e-07, 9.1400e-08, 2.0530e-07, 6.8900e-08,
       3.2770e-07, 3.7000e-08, 7.5700e-08, 1.3010e-07, 1.5140e-07,
       1.2800e-08, 2.8900e-07, 1.3010e-07, 3.7000e-08, 1.8320e-07,
       2.4740e-07, 3.6810e-07, 6.7600e-08, 2.5250e-07, 7.4000e-08,
       3.8890e-07, 2.5250e-07, 1.2890e-07, 1.0100e-06, 1.3010e-07,
       1.2250e-07, 2.8900e-07, 1.4500e-08, 2.3810e-07, 9.6500e-08,
       3.7000e-08, 1.4500e-08, 7.5700e-08, 4.9480e-07, 2.8900e-07,
       1.2370e-07, 3.5300e-08, 6.2180e-07, 2.8900e-08, 9.6500e-08,
       3.7000e-08, 2.6010e-07, 1.8490e-07, 4.2250e-07, 1.2500e-07,
       1.1560e-07, 6.2500e-08, 9.6500e-08, 8.1000e-09, 1.2200e-07,
       7.5700e-08, 2.8900e-07, 4.4500e-07, 1.9010e-07, 1.3010e-07,
       1.4450e-07, 5.9290e-07, 1.8320e-07, 4.7560e-07, 7.5700e-08,
       6.6050e-07, 1.7810e-07, 4.2250e-07, 2.5250e-07, 2.5250e-07,
       1.4450e-07, 2.0880e-07, 2.4500e-07, 1.8490e-07, 6.1300e-08,
       1.4500e-08, 3.2770e-07, 2.8900e-08, 1.2370e-07, 3.7000e-08,
       1.7640e-07, 3.7000e-08, 6.7600e-08, 3.2770e-07, 1.5140e-07,
       1.8490e-07, 2.4740e-07, 1.4450e-07, 3.5300e-08, 6.4000e-09,
       3.0740e-07, 3.7000e-08, 8.1000e-09, 3.0050e-07, 2.8900e-08,
       5.4490e-07])
In [13]:
# The np.shape() command tells us that we have 311 elements in our 1-D array.
np.shape(data)
Out[13]:
(311,)
In [15]:
# We can choose to look at only the nth element...
data[40]
Out[15]:
1.781e-07
In [16]:
# or we can choose to view a range of elements...
data[40:60]
Out[16]:
array([1.781e-07, 1.845e-07, 1.445e-07, 0.000e+00, 3.380e-07, 1.289e-07,
       9.650e-08, 9.650e-08, 3.277e-07, 1.930e-07, 1.445e-07, 2.525e-07,
       6.218e-07, 4.948e-07, 1.849e-07, 1.220e-07, 1.156e-07, 1.280e-08,
       1.849e-07, 1.620e-08])
In [18]:
# or we can choose to view only every 4th element with an certain range.
data[40:60:4]
Out[18]:
array([1.781e-07, 3.380e-07, 3.277e-07, 6.218e-07, 1.156e-07])
In [20]:
# For plotting, we will make use of another module.  This one is called "Matplotlib".
# Matplotlib use plotting commands that are very similar (but not always identical) 
# to those used by MATLAB.
import matplotlib.pyplot as plt
In [34]:
# Here is a histogram of the entries contained in the array called data.
plt.hist(data)
Out[34]:
(array([123., 101.,  46.,  19.,  11.,   5.,   2.,   0.,   2.,   2.]),
 array([0.00000e+00, 1.14370e-07, 2.28740e-07, 3.43110e-07, 4.57480e-07,
        5.71850e-07, 6.86220e-07, 8.00590e-07, 9.14960e-07, 1.02933e-06,
        1.14370e-06]),
 <BarContainer object of 10 artists>)
In [63]:
# By default, plt.hist() has given us 10 bins.  We could use an option to specify a different 
# number of bins.  We'll we're at it, we can also use other options to format the histogram.
# Note that 'k' is for black (because 'b' is for blue).
nbins = 15
plt.hist(data, nbins, color='lightskyblue', edgecolor='k')
Out[63]:
(array([105.,  80.,  39.,  36.,  20.,   9.,   8.,   5.,   3.,   2.,   0.,
          0.,   1.,   1.,   2.]),
 array([0.00000000e+00, 7.62466667e-08, 1.52493333e-07, 2.28740000e-07,
        3.04986667e-07, 3.81233333e-07, 4.57480000e-07, 5.33726667e-07,
        6.09973333e-07, 6.86220000e-07, 7.62466667e-07, 8.38713333e-07,
        9.14960000e-07, 9.91206667e-07, 1.06745333e-06, 1.14370000e-06]),
 <BarContainer object of 15 artists>)
In [ ]:
# Notice that, not only did we get a plot of the histogram, we also got three additional output.
# The first is an array of the number of counts in each bin.  The second is an array that specifies 
# the boundaries of the bins. That means, if there are N bins, there are N entries in the first array 
# and N+1 entries in the second array.  The third output we won't use, but you can, if you like,
# read about it in online documentation.  It can be used/manipulated to customize the look of the histogram.
In [64]:
# Some times it might be useful to assign these outputs names so that they can be used later.
# This can be done as follows.
counts, edges, patches = plt.hist(data, nbins, color='lightskyblue', edgecolor='k')
In [65]:
# We can now look at the counts and bin edges.  The "print()" command has been used to do some basic
# formatting of the output.  Also note that, without the explicit print() commands, the Jupyter
# Notebook will only display the output of the last statement in a cell. 
print("Counts:", counts)
print("Edges:", edges)
Counts: [105.  80.  39.  36.  20.   9.   8.   5.   3.   2.   0.   0.   1.   1.
   2.]
Edges: [0.00000000e+00 7.62466667e-08 1.52493333e-07 2.28740000e-07
 3.04986667e-07 3.81233333e-07 4.57480000e-07 5.33726667e-07
 6.09973333e-07 6.86220000e-07 7.62466667e-07 8.38713333e-07
 9.14960000e-07 9.91206667e-07 1.06745333e-06 1.14370000e-06]
In [66]:
# If we wanted, we could use the Edges to calculate the position of the centre of the bins.
# First note that the length of an array can be determined using "len()".
len(counts)
Out[66]:
15
In [67]:
# Here's how to add half a binwidth to the left boundary of each bin.
binwidth = edges[1] - edges[0]
centres = edges[0:len(counts)] + binwidth/2
print("Centres:", centres)
Centres: [3.81233333e-08 1.14370000e-07 1.90616667e-07 2.66863333e-07
 3.43110000e-07 4.19356667e-07 4.95603333e-07 5.71850000e-07
 6.48096667e-07 7.24343333e-07 8.00590000e-07 8.76836667e-07
 9.53083333e-07 1.02933000e-06 1.10557667e-06]
In [68]:
# Next, suppose that the data that we're plotting as a histgram came from a counting
# experiment.  In that case, as you will see later in PHYS 232, the uncertainty in
# the number of counts in each bin is given by the square root of the number of 
# counts in the bin.  We can use "np.sqrt()" to evaluate square roots of the elements
# in an array
err = np.sqrt(counts)
print("Error:", err)
Error: [10.24695077  8.94427191  6.244998    6.          4.47213595  3.
  2.82842712  2.23606798  1.73205081  1.41421356  0.          0.
  1.          1.          1.41421356]
In [75]:
# We can make a scatter plot with error bars using Matplotlib's "plt.errorbar()" command.
# I've used some options to formate the plot.
# fmt = 'k.' indicates that we want a scatter plot with points that are black.
# markersize = 12 specifies the size of the data points.
# linewidth = 1.5 specfies with width of the lines used to draw the error bars.
# capsize = 5 specifies the length of the horizontal parts of the error bars.
plt.errorbar(centres, counts, err, fmt = 'k.', markersize = 12, linewidth = 1.5, capsize = 5);
In [73]:
# If we follow the plt.hist() command by the plt.errorbar() command we will get a single graph displaying
# both plots.  I've also specified a markersize of zero for the scatterplot.
plt.hist(data, nbins, color='lightskyblue', edgecolor='k')
plt.errorbar(centres, counts, err, fmt = 'k.', markersize = 0, linewidth = 1.5, capsize = 5);
In [74]:
# Lastly, we can add x- and y-axis labels to out plot.
plt.hist(data, nbins, color='lightskyblue', edgecolor='k')
plt.errorbar(centres, counts, err, fmt = 'k.', markersize = 0, linewidth = 1.5, capsize = 5)
plt.xlabel('bin centres')
plt.ylabel('counts');
In [83]:
# Finally, if we want to save the plot to a separate file, we can use "plt.savefig()".
# The savefig() command needs to be in the same cell that was used to generate the plot.

# First, we regenerate the plot that was made in the previous cell...
plt.hist(data, nbins, color='lightskyblue', edgecolor='k')
plt.errorbar(centres, counts, err, fmt = 'k.', markersize = 0, linewidth = 1.5, capsize = 5)
plt.xlabel('bin centres')
plt.ylabel('counts')

# Next, we save a pdf copy...
plt.savefig('PHYS 232 Histogram Example.pdf')

# ...or a jpg copy...
plt.savefig('PHYS 232 Histogram Example.jpg')

# ...or a png copy...
plt.savefig('PHYS 232 Histogram Example.png')

# ...or an eps copy.
plt.savefig('PHYS 232 Histogram Example.eps')

# You can find (and download) this figures by using the File->Open menu and searching through the list of files.